/*    Copyright 2010 Tobias Marschall
 *
 *    This file is part of MoSDi.
 *
 *    MoSDi is free software: you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation, either version 3 of the License, or
 *    (at your option) any later version.
 *
 *    MoSDi is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License
 *    along with MoSDi.  If not, see <http://www.gnu.org/licenses/>.
 */

package mosdi.discovery;

import java.util.Arrays;

import mosdi.fa.Alphabet;
import mosdi.util.BitArray;
import mosdi.util.Iupac;
import mosdi.util.IupacStringConstraints;

/** Class that calculates bounds on the expectation and expected clump size 
 *  of (constrained) IUPAC patterns. */
public class IidBoundCalculator {
	private int[] pattern;
	private double[] iupacCharProbs;
	// table with conditional probabilities:
	// iupacCharCondiditionalProbs[char1][char2] := P(char1 | char2)
	private double[][] iupacCharConditionalProbs;
	private int validUpTo;
	private IupacStringConstraints[] remainingConstraints;
	private double[] prefixExpectation;
	// for each iupac letter class, the minimal/maximal probability of 
	// a letter in each class.
	private double[] minTable;
	private double[] maxTable;
	
	private final Alphabet dnaAlphabet = Alphabet.getDnaAlphabet();
	private final Alphabet iupacAlphabet = Alphabet.getIupacAlphabet();

	/** Constructor.
	 *  @param pattern IUPAC pattern under consideration. 
	 */
	public IidBoundCalculator(int[] pattern, double[] characterDistribution, IupacStringConstraints constraints) {
		this.pattern = pattern;
		validUpTo = 0;
		remainingConstraints = new IupacStringConstraints[pattern.length+1];
		remainingConstraints[0] = constraints;
		prefixExpectation = new double[pattern.length+1];
		prefixExpectation[0] = 1.0;
		// initialize letter class probability tables
		minTable = new double[4];
		maxTable = new double[4];
		iupacCharProbs = new double[iupacAlphabet.size()];
		Arrays.fill(minTable, 1.0);
		for (int c=0; c<iupacAlphabet.size(); ++c) {
			BitArray genChar = Iupac.iupacCharToBitArray(dnaAlphabet, iupacAlphabet.get(c));
			double p = 0.0;
			for (int i=0; i<genChar.size(); ++i) {
				if (!genChar.get(i)) continue;
				p+=characterDistribution[i];
			}
			iupacCharProbs[c] = p;
			int charClass = Iupac.getCharacterMultiplicity(c)-1;
			if (minTable[charClass]>p) minTable[charClass]=p;
			if (maxTable[charClass]<p) maxTable[charClass]=p;
		}
		// initialize conditional probability table
		iupacCharConditionalProbs = new double[iupacAlphabet.size()][iupacAlphabet.size()];
		for (int c1=0; c1<iupacAlphabet.size(); ++c1) {
			BitArray genChar1 = Iupac.iupacCharToBitArray(dnaAlphabet, iupacAlphabet.get(c1));
			for (int c2=0; c2<iupacAlphabet.size(); ++c2) {
				BitArray intersection = Iupac.iupacCharToBitArray(dnaAlphabet, iupacAlphabet.get(c2));
				intersection.and(genChar1);
				double p = 0.0;
				for (int i=0; i<intersection.size(); ++i) {
					if (!intersection.get(i)) continue;
					p+=characterDistribution[i];
				}
				iupacCharConditionalProbs[c1][c2] = p/iupacCharProbs[c2];
			}
		}
	}
	
	/** Updates the tables remainingConstraints and prefixExpectation up to a given
	 *  prefix length.
	 */
	private void updatePrefixData(int prefixLength) {
		while (validUpTo < prefixLength) { // be lazy
			int k = validUpTo;
			prefixExpectation[k+1] = prefixExpectation[k] * iupacCharProbs[pattern[k]];
			remainingConstraints[k+1] = remainingConstraints[k].minus(pattern[k]);
			++validUpTo;
		}
	}
	
	/**
	 * Calculate lower bound for expectation for a pattern composed of the
	 * prefix pattern[0..prefixLength-1] followed by an arbitrary suffix that
	 * complies with the constraints 
	 * 
	 * @param prefixLength an index of the range [0,...,pattern.length]
	 */
	public double getExpectationLowerBound(int prefixLength) {
		updatePrefixData(prefixLength);
		double p = prefixExpectation[prefixLength];
		int[] minFrequencies = remainingConstraints[prefixLength].getMinFrequencies();
		int[] maxFrequencies = remainingConstraints[prefixLength].getMaxFrequencies();
		int n = pattern.length - prefixLength;
		for (int i=0; i<4; ++i) {
			if (minFrequencies[i]==0) continue;
			p*=Math.pow(minTable[i],minFrequencies[i]);
			n-=minFrequencies[i];
		}
		for (int i=0; (i<4)&&(n>0); ++i) {
			if (maxFrequencies[i]==0) continue;
			int k = Math.min(maxFrequencies[i], n);
			p*=Math.pow(minTable[i],k);
			n-=k;
		}		
		if (n!=0) throw new IllegalStateException();
		return p;
	}

	/**
	 * Calculate upper bound for expectation for a pattern composed of the
	 * prefix pattern[0..prefixLength-1] followed by an arbitrary suffix that
	 * complies with the constraints 
	 * 
	 * @param prefixLength an index of the range [0,...,pattern.length]
	 */
	public double getExpectationUpperBound(int prefixLength) {
		updatePrefixData(prefixLength);
		double p = prefixExpectation[prefixLength];
		int[] minFrequencies = remainingConstraints[prefixLength].getMinFrequencies();
		int[] maxFrequencies = remainingConstraints[prefixLength].getMaxFrequencies();
		int n = pattern.length - prefixLength;
		for (int i=0; i<4; ++i) {
			if (minFrequencies[i]==0) continue;
			p*=Math.pow(maxTable[i],minFrequencies[i]);
			n-=minFrequencies[i];
		}
		for (int i=3; (i>=0)&&(n>0); --i) {
			if (maxFrequencies[i]==0) continue;
			int k = Math.min(maxFrequencies[i], n);
			p*=Math.pow(maxTable[i],k);
			n-=k;
		}		
		if (n!=0) throw new IllegalStateException();
		return p;
	}
	
	/** Calculate upper bound for the expected clump size of a pattern
	 *  composed of the prefix pattern[0..prefixLength-1] followed by 
	 *  an arbitrary suffix that complies with the constraints.
	 */
	public double getExpectedClumpSizeUpperBound(int prefixLength) {
		double pTotal = 0.0;
		int[] maxFrequencies = remainingConstraints[prefixLength].getMaxFrequencies();
		int[] minFrequencies = remainingConstraints[prefixLength].getMinFrequencies();
		for (int shift=1; shift<pattern.length; ++shift) {
			double p = 1.0;
			// 1) take known prefix into account
			for (int i=0; i<prefixLength; ++i) {
				if (i+shift<prefixLength) {
					// character overlaps another known character
					// ==> use conditional probability
					p*=iupacCharConditionalProbs[pattern[i]][pattern[i+shift]];
				} else {
					p*=iupacCharProbs[pattern[i]];
				}
				if (p==0.0) break;
			}
			// 2) take (part of the) unknown suffix into account
			if (p>0.0) {
				int k = Math.max(0, shift-prefixLength);
				for (int i=0; (i<4)&&(k>0); ++i) {
					if (minFrequencies[i]==0) continue;
					int j = Math.min(minFrequencies[i], k);
					p*=Math.pow(maxTable[i],j);
					k-=j;
				}
				for (int i=3; (i>=0)&&(k>0); --i) {
					if (maxFrequencies[i]==0) continue;
					int j = Math.min(maxFrequencies[i], k);
					p*=Math.pow(maxTable[i],j);
					k-=j;
				}		
				if (k!=0) throw new IllegalStateException();
			}
			pTotal+=p;
			if (pTotal>=1.0) return Double.POSITIVE_INFINITY;
		}
		return 1.0/(1.0-pTotal); 
	}
	
	/** Returns the expectation of the pattern's prefix. */
	public double getPrefixExpectation(int prefixLength) {
		updatePrefixData(prefixLength);
		return prefixExpectation[prefixLength];
	}

	/** Modify pattern. */
	public void setPatternPosition(int position, int newChar) {
		if (pattern[position]==newChar) return;
		validUpTo = Math.min(validUpTo,position); 
		pattern[position] = newChar;
	}
	
}
